** BPP code for The Effects of Changes in Local Bank Health on Household Consumption
** by Daniel Cooper and Joe Peek, ReStat 2020
/* ////////////////////////////////////////////////////////////////////////// */

/* This code does the PSID part of the procudure of imputing nondurable consumption 
based on the approach in BPP (2006).  This program is run after running BPP_cons_impute_cex.do 
as it pulls the relevant regression coefficients generated by that program for the impuation. */

set more off

/* The code is divided into two blocks for ease of execution.  

--The first block "dataclean" reads in the relavant PSID data as well as the CPI 
price data needed for the imputation. It does some minor data cleaning and then makes the 
necessary sample restrictions and series transformation needed for the BPP imputation procedure. 

--The second block "impute" uses the cleaned/manipulated data from "dataclean" to 
impute nondurable consumption based on the estimated relationships in the CEX. */

* update the line below with the relevant path for running your analysis
cd ~/cex/imputation_dhc/update2017/

local dataclean  = 1
local impute     = 1

 if `dataclean' {
*read in the PSID data
use psiddata_bpp, clear

* merge in the price data (see comments in BPP_cons_impute.do for more details)
merge m:1 year using pricescex
drop if _merge==2
drop _merge

* Make sample restrictions and setup the dataset 
drop if year <1980

* generate a birth year variable.  
gen yrbirth = year-ageh

* drop if missing region or education data since both are needed for the imputation
  drop if region ==.
  drop if educh  ==.

* even though BPP run the restriction for 30-65 year olds we are going to impute consumption for younger households
drop if ageh <25 | ageh >65

* birth year between 1920 or later 
  
drop if yrbirth<1920 
rename yrbirth by

save psiddat_in, replace
}

if `impute' {
use psiddat_in, clear

* transform the variables as needed for the imputation. 
* note that there is no food expenditure data in 1988 and 1989 in the PSID
gen lnfood=ln(food)
gen lnfoodr=ln((food)*100/pricefood)

*child dummies*
tab numkids, gen(child)
gen ch3ormore=(child4==1 | child5==1 | child6==1 | child7==1 | child8 ==1)
*gen ch3ormore=(child4==1 | child9==1 | child5==1 | child6==1 | child7==1 | child8==1)

rename child1 child0
rename child2 child1
rename child3 child2
rename ch3ormore child3

*age*
gen agehsq=ageh^2

*region (BPP and CEX code drops region4 (west) in the analysis *
tab region, gen(reg)

rename educh educh_upd
rename educw educw_upd

*education*
gen ed0=(educh_upd==1)
gen ed1=(educh_upd==2)
gen ed2=(educh_upd==3 | educh_upd==4 | educh_upd==5)

gen edw1=(educw_upd==1)
gen edw2=(educw_upd==2)
gen edw3=(educw_upd==3 | educw_upd==4 | educw_upd==5)

* sample start in 1980
tab year, gen(yr)

forvalues i = 1/18 {
	local j = `i' + 1979
	rename yr`i' yr`j'
}

rename yr19 yr1999
rename yr20 yr2001
rename yr21 yr2003
rename yr22 yr2005
rename yr23 yr2007
rename yr24 yr2009
rename yr25 yr2011
rename yr26 yr2013
rename yr27 yr2015	
 
capture drop coh

gen coh=1 if      (by>=1920&by<1925)
replace coh=2  if (by>=1925&by<1930)
replace coh=3  if (by>=1930&by<1935)
replace coh=4  if (by>=1935&by<1940)
replace coh=5  if (by>=1940&by<1945)
replace coh=6  if (by>=1945&by<1950)
replace coh=7  if (by>=1950&by<1955)
replace coh=8  if (by>=1955&by<1960)
replace coh=9  if (by>=1960&by<1965)
replace coh=10 if (by>=1965&by<1970)
replace coh=11 if (by>=1970&by<1975)
replace coh=12 if (by>=1975&by<1980)
replace coh=13 if (by>=1980&by<1985)

tab coh, gen(cohd)

*male*
gen male =(sexh==1)

***********************
* Impute consumption  *
***********************
/* generate imputed log consumption measure (lc) based on households' food expenditures 
and coefficients from the CEX. 
Remember that the BPP CEX approach regressions nondurables on food consumption so to 
generate a measure of nondurable expenditures the equation has to be inverted*/

/* also note that income-related variables are used to "fit" the equation in the CEX given 
the IV approach, but are not used in the prediction */

sort year
merge year using imputebetas_bpp
   
		#delimit ;
		gen lc=(lnfood-(
		bi0_age    *ageh+
		bi0_age2   *agehsq+
		
		bi0_lpf     *lnpricefood+
		bi0_lpfuel  *lnpricefuel+
        bi0_lpalc   *lnpricealc+
        bi0_lptr    *lnpricetrans+

        bi0_child1  *child1+
        bi0_child2  *child2+ 
        bi0_child3  *child3+
			
		bi0_cohd2  *cohd2+
		bi0_cohd3  *cohd3+
		bi0_cohd4  *cohd4+
		bi0_cohd5  *cohd5+
		bi0_cohd6  *cohd6+
		bi0_cohd7  *cohd7+
		bi0_cohd8  *cohd8+
		bi0_cohd9  *cohd9+
		bi0_cohd10  *cohd10+
		bi0_cohd11 *cohd11+
		bi0_cohd12 *cohd12+
		bi0_cohd13 *cohd13+
		bi0_edu1   *ed1+
		bi0_edu2   *ed2+ 
		
		bi0_famsize  *famsize+ 
		bi0_race  *white+
      
        bi0_reg1  *reg1+
		bi0_reg2  *reg2+	
		bi0_reg3  *reg3+

		bi0_cons  ))

		/(bi0_lc+bi0_lced2*ed1+bi0_lced3*ed2 + bi0_lch1*child1 + bi0_lch2*child2 + bi0_lch3*child3
		+bi0_lc1981*yr1981+bi0_lc1982*yr1982+bi0_lc1983*yr1983+bi0_lc1984*yr1984
		+bi0_lc1985*yr1985+bi0_lc1986*yr1986+bi0_lc1987*yr1987+bi0_lc1988*yr1988+bi0_lc1989*yr1989
		+bi0_lc1990*yr1990+bi0_lc1991*yr1991+bi0_lc1992*yr1992 +bi0_lc1993*yr1993+bi0_lc1994*yr1994+bi0_lc1995*yr1995 
        +bi0_lc1996*yr1996+bi0_lc1997*yr1997 +bi0_lc1999*yr1999+bi0_lc2001*yr2001+bi0_lc2003*yr2003
        +bi0_lc2005*yr2005+bi0_lc2007*yr2007 +bi0_lc2009*yr2009 +bi0_lc2011*yr2011 +bi0_lc2013*yr2013 +bi0_lc2015*yr2015
		) ;
		#delimit cr


* drop small imputed values
*replace lc=. if lc<=0

rename lc broadc_bppv2

keep unique year broadc_bppv2
sort unique year

save  bpp_consv5, replace
}
